{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "路透社语料库中的前5个文件：\n"
     ]
    },
    {
     "ename": "LookupError",
     "evalue": "\n**********************************************************************\n  Resource \u001b[93mreuters\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('reuters')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mcorpora/reuters\u001b[0m\n\n  Searched in:\n    - 'C:\\\\Users\\\\Administrator/nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\share\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\lib\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\AppData\\\\Roaming\\\\nltk_data'\n    - 'C:\\\\nltk_data'\n    - 'D:\\\\nltk_data'\n    - 'E:\\\\nltk_data'\n**********************************************************************\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mLookupError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\corpus\\util.py\u001b[0m in \u001b[0;36m__load\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     83\u001b[0m                 \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 84\u001b[1;33m                     \u001b[0mroot\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"{self.subdir}/{zip_name}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     85\u001b[0m                 \u001b[1;32mexcept\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\data.py\u001b[0m in \u001b[0;36mfind\u001b[1;34m(resource_name, paths)\u001b[0m\n\u001b[0;32m    582\u001b[0m     \u001b[0mresource_not_found\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34mf\"\\n{sep}\\n{msg}\\n{sep}\\n\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 583\u001b[1;33m     \u001b[1;32mraise\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresource_not_found\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    584\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mLookupError\u001b[0m: \n**********************************************************************\n  Resource \u001b[93mreuters\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('reuters')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mcorpora/reuters.zip/reuters/\u001b[0m\n\n  Searched in:\n    - 'C:\\\\Users\\\\Administrator/nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\share\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\lib\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\AppData\\\\Roaming\\\\nltk_data'\n    - 'C:\\\\nltk_data'\n    - 'D:\\\\nltk_data'\n    - 'E:\\\\nltk_data'\n**********************************************************************\n",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mLookupError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-1-40601c08e0b6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;31m#查看路透社语料库中的前5个文件\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'路透社语料库中的前5个文件：'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreuters\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfileids\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'路透社语料库的类别：'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreuters\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcategories\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m         \u001b[1;31m#查看路透社语料库的类别\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\corpus\\util.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m    119\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"LazyCorpusLoader object has no attribute '__bases__'\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 121\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    122\u001b[0m         \u001b[1;31m# This looks circular, but its not, since __load() changes our\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    123\u001b[0m         \u001b[1;31m# __class__ to something new:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\corpus\\util.py\u001b[0m in \u001b[0;36m__load\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     84\u001b[0m                     \u001b[0mroot\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"{self.subdir}/{zip_name}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     85\u001b[0m                 \u001b[1;32mexcept\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 86\u001b[1;33m                     \u001b[1;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     87\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     88\u001b[0m         \u001b[1;31m# Load the corpus.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\corpus\\util.py\u001b[0m in \u001b[0;36m__load\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     79\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     80\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 81\u001b[1;33m                 \u001b[0mroot\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"{self.subdir}/{self.__name}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     82\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mLookupError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     83\u001b[0m                 \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\data.py\u001b[0m in \u001b[0;36mfind\u001b[1;34m(resource_name, paths)\u001b[0m\n\u001b[0;32m    581\u001b[0m     \u001b[0msep\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"*\"\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m70\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    582\u001b[0m     \u001b[0mresource_not_found\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34mf\"\\n{sep}\\n{msg}\\n{sep}\\n\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 583\u001b[1;33m     \u001b[1;32mraise\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresource_not_found\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    584\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    585\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mLookupError\u001b[0m: \n**********************************************************************\n  Resource \u001b[93mreuters\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('reuters')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mcorpora/reuters\u001b[0m\n\n  Searched in:\n    - 'C:\\\\Users\\\\Administrator/nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\share\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\Anaconda3\\\\lib\\\\nltk_data'\n    - 'C:\\\\Users\\\\Administrator\\\\AppData\\\\Roaming\\\\nltk_data'\n    - 'C:\\\\nltk_data'\n    - 'D:\\\\nltk_data'\n    - 'E:\\\\nltk_data'\n**********************************************************************\n"
     ]
    }
   ],
   "source": [
    "from nltk.corpus import reuters     #获取路透社语料库\n",
    "#查看路透社语料库中的前5个文件\n",
    "print('路透社语料库中的前5个文件：')\n",
    "print(reuters.fileids()[:5])\n",
    "print('路透社语料库的类别：')\n",
    "print(reuters.categories())         #查看路透社语料库的类别\n",
    "print('文件“test/14828”的所属类别：%s'%(reuters.categories(\"test/14828\"))) #获取文件“test/14828”的所属类别\n",
    "#查看类别\"crude\"和“cron”对应的文件\n",
    "print('类别“crude”和“cron”对应的文件:')\n",
    "print(reuters.fileids([\"crude\",\"cron\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
